Librerias

library("rmdformats")
library("VGAM")
## Loading required package: stats4
## Loading required package: splines
library("readr")
library("here")
## here() starts at C:/Users/azael/OneDrive/Cursos/Maestria/Estadistica inferencial/Expo Distribucion ingresos/Estudio
library("Rsolnp")
library("GB2")
## 
## Attaching package: 'GB2'
## The following object is masked from 'package:VGAM':
## 
##     fisk
library("fitdistrplus")
## Loading required package: MASS
## Loading required package: survival
library("tidyverse")
## -- Attaching packages ----------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v dplyr   1.0.2
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts -------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::fill()   masks VGAM::fill()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::select() masks MASS::select()
library("viridis")
## Loading required package: viridisLite
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Simulación datos censurados

ingresos <- rsinmad(100000, 1.14, 1.75, 2.07)
weights <- rexp(100000,0.002852401)

top_code <- 3

prop_real <- sum(ingresos>top_code)/100000
real_mean <- mean(ingresos[ingresos>top_code])

ingresos_censurado <- rep(0,100000)
ingresos_censurado[ingresos < top_code] <- ingresos[ingresos < top_code]
ingresos_censurado[ingresos >= top_code] <- top_code

Método

cutoff <- quantile(ingresos, .95)[[1]]
M = sum(ingresos_censurado>cutoff & ingresos_censurado<top_code)
Te = sum(ingresos_censurado==3)
alpha_est = M/(Te*log(top_code) + sum(log(ingresos_censurado[ingresos_censurado>=cutoff & ingresos_censurado<top_code])) - (M+Te)*log(cutoff))

invertido = alpha_est/(alpha_est-1)
estimated_mean <- top_code*invertido

## Media estimada
estimated_mean
## [1] 4.573514
## Media real
real_mean
## [1] 4.258207

Practica con la ENIGH

Cargamos datos

## ENIGH
## Homogeneizamos los nombres de las columnas de interes
concentradohogar2012 <- read_csv(here("data", "concentradohogar2012.csv"))
colnames(concentradohogar2012)[colnames(concentradohogar2012) == "factor_hog"] <- "factor"

concentradohogar2014 <- read_csv(here("data", "concentradohogar2014.csv"))
colnames(concentradohogar2014)[colnames(concentradohogar2014) == "factor_hog"] <- "factor"

concentradohogar2016 <- read_csv(here("data", "concentradohogar2016.csv"))
concentradohogar2018 <- read_csv(here("data", "concentradohogar2018.csv"))

## Datos anonimos del SAT
sat2012 <- read_delim(here("data", "RUIDO_FINAL_PF_2012.tab"), "\t", escape_double = FALSE, trim_ws = TRUE)

Histogramas

df <- tibble(ing_cor = c(concentradohogar2012$ing_cor,
                         concentradohogar2014$ing_cor,
                         concentradohogar2016$ing_cor,
                         concentradohogar2018$ing_cor),
             factor = c(concentradohogar2012$factor,
                        concentradohogar2014$factor,
                        concentradohogar2016$factor,
                        concentradohogar2018$factor),
o = c(rep("2012", length(concentradohogar2012$ing_cor)),
                     rep("2014", length(concentradohogar2014$ing_cor)),
                     rep("2016", length(concentradohogar2016$ing_cor)),
                     rep("2018", length(concentradohogar2018$ing_cor)))
              )

histograma <- ggplot(df, aes(x = ing_cor, weight = factor)) + 
  geom_histogram(bins = 60) + 
  facet_wrap(~año) +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::comma) +
  xlab("Ingreso Corriente") +  ylab("Frecuencia") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

histograma_zoomed <- ggplot(df, aes(x = ing_cor, weight = factor)) + 
  geom_histogram(bins = 600) + 
  facet_wrap(~año) +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::comma) +
  coord_cartesian(xlim=c(0,3000000)) +
  xlab("Ingreso Corriente") +  ylab("Frecuencia") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

histograma_log <- ggplot(df, aes(x = ing_cor, weight = factor)) + 
  geom_histogram(bins = 30) + 
  facet_wrap(~año) +
  scale_x_continuous(trans = "log", labels = scales::comma) +
  scale_y_continuous(labels = scales::comma) +
  xlab("Ingreso Corriente (log)") +  ylab("Frecuencia") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Histograma automático

histograma

Histograma con zoom

histograma_zoomed

Histograma escala logaritmica

histograma_log

Ingreso medio por percentiles

ingreso_medio_percentil <- function(datos, n_percentil) {
  # datos son las base de datos del censo, n_percentil es el numero de divisiones (e.g.: 10 retorna deciles, 100 retorna percentiles)
  
  ## Datos sorteados por ingresos trimestales corrientes más indice de los hogares representados observados
  sorted_ing <- datos %>% select(ing_cor, factor) %>%
    arrange(ing_cor) %>%
    mutate(cumulative_obs = cumsum(factor))
  ## Total de hogares representados (ver definicion de factor en documentacion de la ENIGH)
  total_hogares <- sum(datos$factor)
  
  ## Tamaño por decil truncado para no incluir decimales
  tam_decile <- trunc(total_hogares/n_percentil)
  
  ## Primeros n-1 deciles
  lim_inf <- 1
  lim_sup <- tam_decile
  ing_med_de <- c()
  for (i in 1:(n_percentil-1)) {
    ing_med_de[i] <- sorted_ing %>% filter(cumulative_obs >= lim_inf, cumulative_obs <= lim_sup) %>%
      summarise(mean = sum(ing_cor*factor)/tam_decile) %>% 
      pull()
    
    ## Limites del decil
    lim_inf <- lim_inf + tam_decile
    lim_sup <- lim_sup + tam_decile
  }
  
  ## Decimo decil aparte para incluir el resto de observaciones
  ing_med_de[n_percentil] <- sorted_ing %>% filter(cumulative_obs >= lim_inf) %>%
    summarise(mean = sum(ing_cor*factor)/tam_decile) %>% 
    pull()
  
  return(ing_med_de)
}

n_percentil <- 10
df <- tibble("2012" = ingreso_medio_percentil(concentradohogar2012, n_percentil),
             "2014" = ingreso_medio_percentil(concentradohogar2014, n_percentil),
             "2016" = ingreso_medio_percentil(concentradohogar2016, n_percentil),
             "2018" = ingreso_medio_percentil(concentradohogar2018, n_percentil),
             index = 1:n_percentil)

df <- df %>% gather(key = "Año", value = "Ingreso_medio", -index)

ingreso_medio_decil <- ggplot(df , aes(x = index, y = Ingreso_medio))+
  geom_bar(stat="identity")+
  facet_wrap(~ Año) +
  scale_y_continuous(labels = scales::comma, breaks = seq(0,175000,25000))+
  scale_x_continuous(breaks = 1:10, labels = scales::ordinal)+
  xlab("Decil") + ylab("Ingreso medio") +
  labs(title="Ingreso medio por deciles") +
  theme_bw()

ingreso_medio_decil

Ingreso acumulado en los ultimos 0.1% de los hogares

ingreso_acumulado_percentil <- function(datos, n_percentil){
  # Retorna el porcentaje del ingreso total que acumula cada percentil
  # datos son las base de datos del censo, n_percentil es el numero de divisiones (e.g.: 10 retorna deciles, 100 retorna percentiles)
  
  ## Datos sorteados por ingresos trimestales corrientes más indice de los hogares representados observados
  sorted_ing <- datos %>% select(ing_cor, factor) %>%
    arrange(ing_cor) %>%
    mutate(cumulative_obs = cumsum(factor))
  
  ## Total de hogares representados (ver definicion de factor en documentacion de la ENIGH)
  total_hogares <- sum(datos$factor)
  
  ## Muestras por por percentil truncado para no incluir decimales
  tam_decile <- trunc(total_hogares/n_percentil)
  
  
  ## Primeros n-1 deciles
  lim_inf <- 1
  lim_sup <- tam_decile
  ing_total <- c()
  for (i in 1:(n_percentil-1)) {
    ing_total[i] <- sorted_ing %>% filter(cumulative_obs >= lim_inf, cumulative_obs <= lim_sup) %>%
      summarise(mean = sum(ing_cor*factor)) %>% 
      pull()
    
    ## Limites del decil
    lim_inf <- lim_inf + tam_decile
    lim_sup <- lim_sup + tam_decile
  }
  
  ## Decimo decil aparte para incluir el resto de observaciones
  ing_total[n_percentil] <- sorted_ing %>% filter(cumulative_obs >= lim_inf) %>%
    summarise(mean = sum(ing_cor*factor)) %>% 
    pull()
  
  ing_total <- ing_total/sum(datos$ing_cor*datos$factor)
  
  return(ing_total)
}
n_percentil <- 1000
df <- tibble("2012" = ingreso_acumulado_percentil(concentradohogar2012, n_percentil),
             "2014" = ingreso_acumulado_percentil(concentradohogar2014, n_percentil),
             "2016" = ingreso_acumulado_percentil(concentradohogar2016, n_percentil),
             "2018" = ingreso_acumulado_percentil(concentradohogar2018, n_percentil),
             index = 1:n_percentil)

df <- df %>% gather(key = "Año", value = "Ingreso", -index)

ingreso_acu_per <- ggplot(df, aes(x = index, y = Ingreso)) +
  geom_line(aes(color = Año), size = 1) +
  scale_x_continuous(labels = scales::ordinal) +
  scale_y_continuous(labels = scales::percent) +
  coord_cartesian(xlim=c(990,1000)) +
  ylab("Porcentaje de Ingreso acumulado") + xlab("Percentil (0.1%)") +
  labs(title="Ingreso acumulado por cada 0.1% de la poblacion")
  theme_bw() +
  theme(legend.position = c(.1,.8), legend.background = element_rect(fill = "transparent", colour = "transparent"))
## List of 93
##  $ line                      :List of 6
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                      :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ size         : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                      :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                     : NULL
##  $ aspect.ratio              : NULL
##  $ axis.title                : NULL
##  $ axis.title.x              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top          :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom       : NULL
##  $ axis.title.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left         : NULL
##  $ axis.title.y.right        :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top           :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom        : NULL
##  $ axis.text.y               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left          : NULL
##  $ axis.text.y.right         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                :List of 6
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ axis.ticks.x              : NULL
##  $ axis.ticks.x.top          : NULL
##  $ axis.ticks.x.bottom       : NULL
##  $ axis.ticks.y              : NULL
##  $ axis.ticks.y.left         : NULL
##  $ axis.ticks.y.right        : NULL
##  $ axis.ticks.length         : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x       : NULL
##  $ axis.ticks.length.x.top   : NULL
##  $ axis.ticks.length.x.bottom: NULL
##  $ axis.ticks.length.y       : NULL
##  $ axis.ticks.length.y.left  : NULL
##  $ axis.ticks.length.y.right : NULL
##  $ axis.line                 : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x               : NULL
##  $ axis.line.x.top           : NULL
##  $ axis.line.x.bottom        : NULL
##  $ axis.line.y               : NULL
##  $ axis.line.y.left          : NULL
##  $ axis.line.y.right         : NULL
##  $ legend.background         :List of 5
##   ..$ fill         : chr "transparent"
##   ..$ colour       : chr "transparent"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.margin             : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing            : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing.x          : NULL
##  $ legend.spacing.y          : NULL
##  $ legend.key                :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ legend.key.size           : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height         : NULL
##  $ legend.key.width          : NULL
##  $ legend.text               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.align         : NULL
##  $ legend.title              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.align        : NULL
##  $ legend.position           : num [1:2] 0.1 0.8
##  $ legend.direction          : NULL
##  $ legend.justification      : chr "center"
##  $ legend.box                : NULL
##  $ legend.box.just           : NULL
##  $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "unit")= int 1
##  $ legend.box.background     : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing        : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ panel.background          :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : logi NA
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.border              :List of 5
##   ..$ fill         : logi NA
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ panel.spacing             : 'simpleUnit' num 5.5points
##   ..- attr(*, "unit")= int 8
##  $ panel.spacing.x           : NULL
##  $ panel.spacing.y           : NULL
##  $ panel.grid                :List of 6
##   ..$ colour       : chr "grey92"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major          : NULL
##  $ panel.grid.minor          :List of 6
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.5
##   ..$ linetype     : NULL
##   ..$ lineend      : NULL
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ panel.grid.major.x        : NULL
##  $ panel.grid.major.y        : NULL
##  $ panel.grid.minor.x        : NULL
##  $ panel.grid.minor.y        : NULL
##  $ panel.ontop               : logi FALSE
##  $ plot.background           :List of 5
##   ..$ fill         : NULL
##   ..$ colour       : chr "white"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.title.position       : chr "panel"
##  $ plot.subtitle             :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : num 1
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 5.5points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.caption.position     : chr "panel"
##  $ plot.tag                  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 1.2
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ plot.tag.position         : chr "topleft"
##  $ plot.margin               : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ strip.background          :List of 5
##   ..$ fill         : chr "grey85"
##   ..$ colour       : chr "grey20"
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ strip.background.x        : NULL
##  $ strip.background.y        : NULL
##  $ strip.placement           : chr "inside"
##  $ strip.text                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey10"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 4.4points 4.4points 4.4points 4.4points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.text.x              : NULL
##  $ strip.text.y              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ strip.switch.pad.grid     : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ strip.switch.pad.wrap     : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ strip.text.y.left         :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE
ingreso_acu_per

Ingreso acumulado por Decil

n_percentil <- 10
df <- tibble("2012" = ingreso_acumulado_percentil(concentradohogar2012, n_percentil),
             "2014" = ingreso_acumulado_percentil(concentradohogar2014, n_percentil),
             "2016" = ingreso_acumulado_percentil(concentradohogar2016, n_percentil),
             "2018" = ingreso_acumulado_percentil(concentradohogar2018, n_percentil),
             index = fct_rev(as.factor(1:n_percentil))) 

df <- df %>% gather(key = "Año", value = "Ingreso", -index)

ingreso_acu_decil <- ggplot(df, aes(fill = index, x = Año, y = Ingreso)) +
  geom_bar(position="stack", stat="identity") +
  ylab("Porcentaje de Ingreso acumulado") + xlab("Año") +
  scale_fill_viridis(discrete = T, name = "Decil") +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() +
  labs(title= "Ingreso acumulado por Decil")+
  theme(legend.background = element_rect(fill = "transparent", colour = "transparent"))

ingreso_acu_decil

Gastan más de lo que ingresan

gastan_de_mas <- function(datos){
  total_hogares <- sum(datos$factor)
  datos %>% 
    transmute(mas_gasto = (gasto_mon > ing_cor)*factor) %>%
    summarise(proporcion = sum(mas_gasto)/total_hogares) %>%
    pull()
}

# Porcentaje de personas que gastan de más
over_spenders <- c(gastan_de_mas(concentradohogar2012),
                   gastan_de_mas(concentradohogar2014),
                   gastan_de_mas(concentradohogar2016),
                   gastan_de_mas(concentradohogar2018))

ggplot()+
  geom_bar(aes(x=c("2012", "2014", "2016", "2018"), y=over_spenders), stat = "identity") +
  scale_y_continuous(labels = scales::percent) +
  xlab("Año") + ylab("")+
  labs(title="Porcentaje personas que gastan más de lo que ingresan")+
  theme_bw()

Datos del SAT

breaks <- c(1,2,3,4,5,6,8,10, 15,20,30,50,100,200,500,1000)*1000000


means <- c()
for (i in 1:(length(breaks)-1)) {
  means[i]<- mean(sat2012$I_DEC_TIAONCT1_AA[sat2012$I_DEC_TIAONCT1_AA >= breaks[i]
                                            & 
                                            sat2012$I_DEC_TIAONCT1_AA<breaks[i+1]], 
                  na.rm = T)
}

top_sat <- ggplot()+
  geom_line(aes(x=1:(length(breaks)-1), y=means), color = "light blue", size = 1) +
  scale_x_continuous(breaks = 1:(length(breaks)-1), labels = c("1-2 M", "2-3 M", "3-4 M", "4-5 M", "5-6 M", "6-8 M", "8-10 M", "10-15 M", "15-20 M", "20-30 M", "30-50 M", "50-100 M", "100-200 M", "200-500 M", "500-1000 M")) +
  scale_y_continuous(labels = scales::comma, breaks=seq(0,600000000,100000000)) +
  labs(title = "Ingreso medio segun ingresos reportados al SAT") +
  xlab("Ingreso medio") + ylab("Rango de ingreso")+
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

top_sat

Estimación Maxima verosimilitud Log-normal 2012

mean_log <- function(x) {
  exp(x[1]+(x[2]^2)/2)
}

pll_log <- function(x){
  -sum(dlnorm(concentradohogar2012$ing_cor+.01, x[1], x[2], log = TRUE)*concentradohogar2012$factor)
}

par_log_NR<- optim(par = c(10, 1), pll_log, control=list(maxit=1000))$par


par_log_R <- solnp(pars = c(11, 2), # Valores inciales
               pll_log, # función a optimizar
               eqfun=mean_log, # función restricción
               eqB=92733.62,   # Valor a igualar
               LB=c(0,0), # Soporte mínimo para los parametros
               UB=c(20,5))$pars # Soporte máximo para los parametros
## 
## Iter: 1 fn: 373023071.8492    Pars:  10.53334  1.83813
## Iter: 2 fn: 369313786.9753    Pars:  10.47885  1.57183
## Iter: 3 fn: 367073387.1403    Pars:  10.55500  1.36961
## Iter: 4 fn: 366549763.1526    Pars:  10.60202  1.29591
## Iter: 5 fn: 366512324.9051    Pars:  10.61045  1.28615
## Iter: 6 fn: 366511843.7034    Pars:  10.61068  1.28593
## Iter: 7 fn: 366511843.4866    Pars:  10.61068  1.28593
## Iter: 8 fn: 366511843.4875    Pars:  10.61068  1.28593
## solnp--> Completed in 8 iterations
## Media muestral
weighted.mean(concentradohogar2012$ing_cor, concentradohogar2012$factor)
## [1] 37999.64
## Media No restringida
mean_log(par_log_NR)
## [1] 37254.33
## Media restringida
mean_log(par_log_R)
## [1] 92733.62

Estimación Maxima verosimilitud GB2 2012

mean_gb2 <- function(x) {
  moment.gb2(1, x[1], x[2], x[3], x[4])
}

pll_gb2 <- function(x){
  -loglp.gb2(concentradohogar2012$ing_cor+.01, x[1], x[2],x[3], x[4], w=concentradohogar2012$factor)
}

par_gb2_NR<- optim(par = c(1.307071, 20594.790235, 2.341514, 1.843004), pll_gb2, method = "BFGS",control=list(maxit=1000))$par

par_gb2_R <- solnp(pars = c(1.5, 30000, 1, 1), # Valores inciales
               pll_gb2, # función a optimizar
               eqfun=mean_gb2, # función restricción
               eqB=92733.62,   # Valor a igualar
               LB=c(0,0,0,0), # Soporte mínimo para los parametros
               UB=c(5,100000,5,5))$pars # Soporte máximo para los parametros
## 
## Iter: 1 fn: 11.5445   Pars:      1.47087 30642.59739     1.32305     1.01227
## Iter: 2 fn: 11.5258   Pars:      1.48804 30640.46754     1.04509     0.94058
## Iter: 3 fn: 11.5258   Pars:      1.48801 30640.47383     1.04545     0.94067
## Iter: 4 fn: 11.5258   Pars:      1.48801 30640.47383     1.04545     0.94067
## solnp--> Completed in 4 iterations
## Media muestral
weighted.mean(concentradohogar2012$ing_cor, concentradohogar2012$factor)
## [1] 37999.64
## Media No restringida
mean_gb2(par_gb2_NR)
## [1] 38249.42
## Media restringida
mean_gb2(par_gb2_R)
## [1] 92733.62

Distribución empírica contra estimadas restringidas

distribucion_empirica <- function(x, datos, factor){
  # x es el valor donde se quiere evaluar la función de distribución empírica
  # datos es el conjunto de datos en formato de vector que se quiere evaluar
  df <- data.frame(datos = datos, factor = factor, cumu = cumsum(factor))
  F_x <- c()
  for (i in 1:length(x)) {
    F_x[i] <- sum(df[df$datos<x[i], "factor"])/df[length(datos), "cumu"]
  }
  return(F_x)
}
distri_empi_restri <- ggplot() +
  geom_function(aes(colour = "Empirica"), 
                fun = distribucion_empirica, 
                args = list(datos = concentradohogar2012$ing_cor, 
                            factor = concentradohogar2012$factor), 
                size = 1) +
  geom_function(aes(colour = "GB2"), 
                fun = pgb2, 
                args = list(shape1 = par_gb2_R[1],
                            scale = par_gb2_R[2],
                            shape2 = par_gb2_R[3],
                            shape3 = par_gb2_R[4]),
                size = 1) +
  geom_function(aes(colour = "Log-Normal"), 
                fun = plnorm, 
                args = list(meanlog = par_log_R[1],
                            sdlog = par_log_R[2]), 
                size = 1) +
  scale_x_continuous(trans = 'log', limits = c(600, 4000000), breaks = 10^(seq(2,6,1)), labels = scales::comma) +
  scale_y_continuous(breaks = seq(0,1,.1)) +
  scale_colour_manual(name="Distribución",values=c("Empirica"="#3591d1", "GB2"="#f04546", "Log-Normal"="#f045d6")) +
  ylab("Probabilidad") + xlab("Ingreso (Escala log)") +
  labs(title = "Distribución empírica contra estimadas con restricción") +
  theme_bw() +
  theme(legend.position = c(.1,.8), legend.background = element_rect(fill = "transparent", colour = "transparent"))

distri_empi_restri

Restringida vs No restringida

p1 <- ggplot() +
  geom_function(aes(colour = "Log-Normal Restringida"), 
                fun = plnorm, 
                args = list(meanlog = par_log_R[1],
                            sdlog = par_log_R[2]), 
                size = 1) +
  geom_function(aes(colour = "Log-Normal No Restringida"), 
                fun = plnorm, 
                args = list(meanlog = par_log_NR[1],
                            sdlog = par_log_NR[2]), 
                size = 1) +
  scale_x_continuous(trans = 'log', limits = c(600, 4000000), breaks = 10^(seq(2,6,1)), labels = scales::comma) +
  scale_y_continuous(breaks = seq(0,1,.1)) +
  scale_colour_manual(name="Distribución",values=c("Log-Normal Restringida"="#f04546", "Log-Normal No Restringida"="#3591d1")) +
  ylab("Probabilidad") + xlab("Ingreso (Escala log)") +
  theme_bw() +
  theme(legend.position = c(.25,.8), legend.background = element_rect(fill = "transparent", colour = "transparent"))

p2 <- ggplot() +
  geom_function(aes(colour = "GB2 Restringida"), 
                fun = pgb2, 
                args = list(shape1 = par_gb2_R[1],
                            scale = par_gb2_R[2],
                            shape2 = par_gb2_R[3],
                            shape3 = par_gb2_R[4]),
                size = 1) +
  geom_function(aes(colour = "GB2 No Restringida"), 
                fun = pgb2, 
                args = list(shape1 = par_gb2_NR[1],
                            scale = par_gb2_NR[2],
                            shape2 = par_gb2_NR[3],
                            shape3 = par_gb2_NR[4]),
                size = 1) +
  scale_x_continuous(trans = 'log', limits = c(600, 4000000), breaks = 10^(seq(2,6,1)), labels = scales::comma) +
  scale_y_continuous(breaks = seq(0,1,.1)) +
  scale_colour_manual(name="Distribución",values=c("GB2 Restringida"="#f04546", "GB2 No Restringida"="#3591d1")) +
  ylab("Probabilidad") + xlab("Ingreso (Escala log)") +
  theme_bw() +
  theme(legend.position = c(.2,.8), legend.background = element_rect(fill = "transparent", colour = "transparent"))

grid.arrange(p1,p2,ncol=2)

Ingreso acumulado por Decil empírico contra estimados

## Calculamos el acumulado por decil mediante simulación
x <- rlnorm(100000, par_log_R[1], par_log_R[2])
deciles <- quantile(x, probs = seq(0,1,.1))
total<-sum(x)

medd <- c()
for (i in 1:10) {
  medd[i] <- sum(x[x > deciles[i] & x <= deciles[i+1]])/total
}

## Construcción de la tabla para alimentar el ggplot
n_percentil <- 10
df <- tibble("ENIGH 2012" = ingreso_acumulado_percentil(concentradohogar2012, n_percentil),
             "Log-Normal" = medd,
             index = fct_rev(as.factor(1:n_percentil))) 

df <- df %>% gather(key = "Fuente", value = "Ingreso", -index)

## Construcciónd de la gráfica
ingreso_acu_decil_ENIGH_Log <- ggplot(df, aes(fill = index, x = Fuente, y = Ingreso)) +
  geom_bar(position="stack", stat="identity", width=0.8) +
  ylab("Porcentaje de Ingreso acumulado") + xlab("Modelo") +
  scale_fill_viridis(discrete = T, name = "Decil") +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() +
  theme(legend.background = element_rect(fill = "transparent", colour = "transparent"))

ingreso_acu_decil_ENIGH_Log

Simulación nube de gas